{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### HMM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "### 定义HMM模型\n",
    "class HMM:\n",
    "    def __init__(self, N, M, pi=None, A=None, B=None):\n",
    "        # 可能的状态数\n",
    "        self.N = N\n",
    "        # 可能的观测数\n",
    "        self.M = M\n",
    "        # 初始状态概率向量\n",
    "        self.pi = pi\n",
    "        # 状态转移概率矩阵\n",
    "        self.A = A\n",
    "        # 观测概率矩阵\n",
    "        self.B = B\n",
    "\n",
    "    # 根据给定的概率分布随机返回数据\n",
    "    def rdistribution(self, dist): \n",
    "        r = np.random.rand()\n",
    "        for ix, p in enumerate(dist):\n",
    "            if r < p: \n",
    "                return ix\n",
    "            r -= p\n",
    "\n",
    "    # 生成HMM观测序列\n",
    "    def generate(self, T):\n",
    "        # 根据初始概率分布生成第一个状态\n",
    "        i = self.rdistribution(self.pi)  \n",
    "        # 生成第一个观测数据\n",
    "        o = self.rdistribution(self.B[i])  \n",
    "        observed_data = [o]\n",
    "        # 遍历生成剩下的状态和观测数据\n",
    "        for _ in range(T-1):        \n",
    "            i = self.rdistribution(self.A[i])\n",
    "            o = self.rdistribution(self.B[i])\n",
    "            observed_data.append(o)\n",
    "        return observed_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 0, 0, 1, 0]\n"
     ]
    }
   ],
   "source": [
    "# 初始状态概率分布\n",
    "pi = np.array([0.25, 0.25, 0.25, 0.25])\n",
    "# 状态转移概率矩阵\n",
    "A = np.array([\n",
    "    [0,  1,  0, 0],\n",
    "    [0.4, 0, 0.6, 0],\n",
    "    [0, 0.4, 0, 0.6],\n",
    "[0, 0, 0.5, 0.5]])\n",
    "# 观测概率矩阵\n",
    "B = np.array([\n",
    "    [0.5, 0.5],\n",
    "    [0.6, 0.4],\n",
    "    [0.2, 0.8],\n",
    "    [0.3, 0.7]])\n",
    "# 可能的状态数和观测数\n",
    "N = 4\n",
    "M = 2\n",
    "# 创建HMM实例\n",
    "hmm = HMM(N, M, pi, A, B)\n",
    "# 生成观测序列\n",
    "print(hmm.generate(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.01983169125\n"
     ]
    }
   ],
   "source": [
    "### 前向算法计算条件概率\n",
    "def prob_calc(O):\n",
    "    '''\n",
    "    输入：\n",
    "    O：观测序列\n",
    "    输出：\n",
    "    alpha.sum()：条件概率\n",
    "    '''\n",
    "    # 初值\n",
    "    alpha = pi * B[:, O[0]]\n",
    "    # 递推\n",
    "    for o in O[1:]:\n",
    "        alpha_next = np.empty(4)\n",
    "        for j in range(4):\n",
    "            alpha_next[j] = np.sum(A[:,j] * alpha * B[j,o])\n",
    "        alpha = alpha_next\n",
    "    return alpha.sum()\n",
    "\n",
    "# 给定观测\n",
    "O = [1,0,1,0,0]\n",
    "print(prob_calc(O))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 2, 3, 3]\n"
     ]
    }
   ],
   "source": [
    "### 序列标注问题和维特比算法\n",
    "def viterbi_decode(O):\n",
    "    '''\n",
    "    输入：\n",
    "    O：观测序列\n",
    "    输出：\n",
    "    path：最优隐状态路径\n",
    "    '''    \n",
    "    # 序列长度和初始观测\n",
    "    T, o = len(O), O[0]\n",
    "    # 初始化delta变量\n",
    "    delta = pi * B[:, o]\n",
    "    # 初始化varphi变量\n",
    "    varphi = np.zeros((T, 4), dtype=int)\n",
    "    path = [0] * T\n",
    "    # 递推\n",
    "    for i in range(1, T):\n",
    "        delta = delta.reshape(-1, 1)     \n",
    "        tmp = delta * A\n",
    "        varphi[i, :] = np.argmax(tmp, axis=0)\n",
    "        delta = np.max(tmp, axis=0) * B[:, O[i]]\n",
    "    # 终止\n",
    "    path[-1] = np.argmax(delta)\n",
    "    # 回溯最优路径\n",
    "    for i in range(T-1, 0, -1):\n",
    "        path[i-1] = varphi[i, path[i]]\n",
    "    return path\n",
    "\n",
    "# 给定观测序列\n",
    "O = [1,0,1,1,0]\n",
    "# 输出最可能的隐状态序列\n",
    "print(viterbi_decode(O))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
